Loading packages to simulate and manipulate data. You need to have the pacman package installed. If you do not have it, try this install.packages("pacman").
pacman::p_load(tidyverse, MASS, psych, lavaan)
pacman::p_load_gh("tidyverse/multidplyr")
Data generation is performed by simulating multivariate data based on the following conditions: - Correlation matrix between the items: 0.10, 0.15, 0.20, 0.25 - Number of observations: 50, 100, 250, 500, 1000 - Number of items: 3 to 12 items - Number of replications: 1000
In this way, 200 dataframes are generated with different amounts of data within each of them, which will have 1,000 replications. In total, 200,000 dataframes with observations within each of them will be taken for analysis.
Create the variables that indicate the conditions
set.seed(2019)
r <- seq(0.10, 0.25, 0.05) ## Correlations from 0.10 to 0.25
n <- c(50, 100, 250, 500, 1000) ## Size samples
replic <- 1000
There will be a quadruple for loop to generate the data 10x5x4x1000(rep). This code is computationally intensive and consumes approximately 4.8 gb of ram.
# Create blank lists
sigma <- list() # It will store the matrices of the items
items <- list() # It will store the generated items
for(i in 1:10) { # 10 different quantity of items
sigma[[i]] <- list()
items[[i]] <- list()
for (j in seq_along(r)) { # Change according to the different correlation matrices
sigma[[i]][[j]] <- matrix(data = rep(c(1, rep(r[j], i+2)), i+2),
nrow = i+2,
ncol = i+2)
items[[i]][[j]] <- list()
for (k in seq_along(n)) { # Change based on sample size
items[[i]][[j]][[k]] <- list()
for(l in 1:replic) { # Change based on number of replications (1,000)
items[[i]][[j]][[k]][[l]] <- mvrnorm(n = n[k],
mu = rep(0, i+2),
Sigma = sigma[[i]][[j]]) %>%
as_tibble() %>%
mutate_all(list(Item = ~ findInterval(., c(-Inf, -2, -1, 1, 2, Inf)))) # Perform a symmetrical scaling of the items
}
}
}
}
The `x` argument of `as_tibble.matrix()` must have column names if `.name_repair` is omitted as of tibble 2.0.0.
Using compatibility `.name_repair`.
[90mThis warning is displayed once every 8 hours.[39m
[90mCall `lifecycle::last_warnings()` to see where this warning was generated.[39m
# Delete temporary indexes
rm(i, j, k, l)
The nested lists that we have created will be put together in order to have them identified with columns that indicate the sample size, the correlation and the replication number with which they were simulated.
temp <- items
items <- list()
for(i in 1:10) {
items[[i]] <- list()
for(j in seq_along(r)) {
items[[i]][[j]] <- list()
for(k in seq_along(n)) {
items[[i]][[j]][[k]] <- temp[[i]][[j]][[k]] %>%
bind_rows(.id = "replic")
}
items[[i]][[j]] <- items[[i]][[j]] %>%
bind_rows(.id = "n") %>%
mutate(n = recode(n, "1" = 50, "2" = 100,
"3" = 250, "4" = 500,
"5" = 1000))
}
items[[i]] <- items[[i]] %>%
bind_rows(.id = "correlation") %>%
mutate(correlation = recode(correlation, "1" = 0.1,
"2" = 0.15, "3" = 0.2,
"4" = 0.25)) %>%
group_nest(correlation, n, replic)
}
items <- items %>%
bind_rows(.id = "items") %>%
mutate(items = recode(items, "1" = "3 items",
"2" = "4 items", "3" = "5 items",
"4" = "6 items", "5" = "7 items",
"6" = "8 items", "7" = "9 items",
"8" = "10 items", "9" = "11 items",
"10" = "12 items"))
items
The multidplyr package will be used so that the calculation uses all the processor cores.
cluster <- new_cluster(parallel::detectCores())
items <- items %>%
partition(cluster) %>%
mutate(
alfa_psych = purrr::map(data,
~ psych::alpha(dplyr::select(., -dplyr::ends_with("Item")))),
alfa_coef = purrr::map_dbl(alfa_psych,
~ purrr::pluck(.x, "total", "raw_alpha"))
) %>%
collect()
items